library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rvest)
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:purrr':
##
## pluck
## The following object is masked from 'package:readr':
##
## guess_encoding
library(readxl)
library(hablar)
##
## Attaching package: 'hablar'
## The following object is masked from 'package:dplyr':
##
## na_if
knitr::opts_chunk$set(
fig.width = 6,
fig.asp = .6,
out.width = "90%"
)
theme_set(theme_minimal() + theme(legend.position = "bottom"))
options(
ggplot2.continuous.colour = "viridis",
ggplot2.continuous.fill = "viridis"
)
scale_colour_discrete = scale_colour_viridis_d
scale_fill_discrete = scale_fill_viridis_d
set.seed(1)
First, upload all the excel datasets.
This is the data for 2011:
data11 = read_excel('./data/hiv_data/2011_1.xlsx', range = 'A8:AH48', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x5, x10, x12, x14, x16, x18, x19, x25, x29, x33, x34) %>%
filter(!is.na(x5)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x5,
total_hiv_diag_per = x10,
without_aids_N = x12,
without_aids_per = x14,
concurrent_aids_N = x16,
concurrent_aids_per = x18,
aids = x19,
plwha = x25,
death = x29,
borough = x33,
sex = x34
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2011) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
data11_d = read_excel('./data/hiv_data/2011_1.xlsx', range = 'A49:AH324', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x7, x10, x13, x14, x16, x18, x20, x26, x29, x33, x34) %>%
filter(!is.na(x7)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x7,
total_hiv_diag_per = x10,
without_aids_N = x13,
without_aids_per = x14,
concurrent_aids_N = x16,
concurrent_aids_per = x18,
aids = x20,
plwha = x26,
death = x29,
borough = x33,
sex = x34
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2011) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
This is the data for 2012:
data12 = read_excel('./data/hiv_data/2012.xlsx', range = 'A8:AG48', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x5, x10, x12, x14, x16, x18, x19, x24, x29, x32, x33) %>%
filter(!is.na(x5)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x5,
total_hiv_diag_per = x10,
without_aids_N = x12,
without_aids_per = x14,
concurrent_aids_N = x16,
concurrent_aids_per = x18,
aids = x19,
plwha = x24,
death = x29,
borough = x32,
sex = x33
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2012) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
data12_d = read_excel('./data/hiv_data/2012.xlsx', range = 'A49:AH324', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x7, x10, x13, x14, x16, x18, x20, x26, x29, x32, x33) %>%
filter(!is.na(x7)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x7,
total_hiv_diag_per = x10,
without_aids_N = x13,
without_aids_per = x14,
concurrent_aids_N = x16,
concurrent_aids_per = x18,
aids = x20,
plwha = x26,
death = x29,
borough = x32,
sex = x33
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2012) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
This is the data for 2013:
data13 = read_excel('./data/hiv_data/2013.xlsx', range = 'A8:AG48', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x5, x9, x11, x13, x15, x17, x18, x24, x28, x32, x33) %>%
filter(!is.na(x5)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x5,
total_hiv_diag_per = x9,
without_aids_N = x11,
without_aids_per = x13,
concurrent_aids_N = x15,
concurrent_aids_per = x17,
aids = x18,
plwha = x24,
death = x28,
borough = x32,
sex = x33
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2013) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
data13_d = read_excel('./data/hiv_data/2013.xlsx', range = 'A49:AG324', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x6, x9, x12, x13, x15, x17, x19, x25, x28, x32, x33) %>%
filter(!is.na(x6)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x6,
total_hiv_diag_per = x9,
without_aids_N = x12,
without_aids_per = x13,
concurrent_aids_N = x15,
concurrent_aids_per = x17,
aids = x19,
plwha = x25,
death = x28,
borough = x32,
sex = x33
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2013) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
This is the data for 2014:
data14 = read_excel('./data/hiv_data/2014.xlsx', range = 'A8:AG48', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x5, x8, x10, x11, x13, x15, x16, x21, x25, x29, x30) %>%
filter(!is.na(x5)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x5,
total_hiv_diag_per = x8,
without_aids_N = x10,
without_aids_per = x11,
concurrent_aids_N = x13,
concurrent_aids_per = x15,
aids = x16,
plwha = x21,
death = x25,
borough = x29,
sex = x30
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2014) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 31 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 31 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 31 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
data14_d = read_excel('./data/hiv_data/2014.xlsx', range = 'A50:AD335', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x6, x8, x10, x11, x13, x15, x16, x22, x25, x29, x30) %>%
filter(!is.na(x6)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x6,
total_hiv_diag_per = x8,
without_aids_N = x10,
without_aids_per = x11,
concurrent_aids_N = x13,
concurrent_aids_per = x15,
aids = x16,
plwha = x22,
death = x25,
borough = x29,
sex = x30
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2014) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
This is the data for 2015:
data15 = read_excel('./data/hiv_data/2015.xlsx', range = 'A8:AG49', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x5, x9, x11, x12, x14, x16, x18, x23, x28, x32, x33) %>%
filter(!is.na(x5)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x5,
total_hiv_diag_per = x9,
without_aids_N = x11,
without_aids_per = x12,
concurrent_aids_N = x14,
concurrent_aids_per = x16,
aids = x18,
plwha = x23,
death = x28,
borough = x32,
sex = x33
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2015) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
data15_d = read_excel('./data/hiv_data/2015.xlsx', range = 'A50:AG335', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x6, x9, x11, x12, x14, x16, x18, x25, x28, x32, x33) %>%
filter(!is.na(x6)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x6,
total_hiv_diag_per = x9,
without_aids_N = x11,
without_aids_per = x12,
concurrent_aids_N = x14,
concurrent_aids_per = x16,
aids = x18,
plwha = x25,
death = x28,
borough = x32,
sex = x33
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2015) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
This is the data for 2016:
data16 = read_excel('./data/hiv_data/2016.xlsx', range = 'A8:AC49', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x5, x7, x9, x10, x12, x14, x15, x20, x24, x28, x29) %>%
filter(!is.na(x5)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x5,
total_hiv_diag_per = x7,
without_aids_N = x9,
without_aids_per = x10,
concurrent_aids_N = x12,
concurrent_aids_per = x14,
aids = x15,
plwha = x20,
death = x24,
borough = x28,
sex = x29
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2016) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
data16_d = read_excel('./data/hiv_data/2016.xlsx', range = 'A50:AC335', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x5, x7, x9, x10, x12, x14, x15, x21, x24, x28, x29) %>%
filter(!is.na(x5)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x5,
total_hiv_diag_per = x7,
without_aids_N = x9,
without_aids_per = x10,
concurrent_aids_N = x12,
concurrent_aids_per = x14,
aids = x15,
plwha = x21,
death = x24,
borough = x28,
sex = x29
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2016) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
This is the data for 2017:
data17 = read_excel('./data/hiv_data/2017.xlsx', range = 'A8:AD49', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x5, x8, x10, x11, x13, x15, x16, x21, x25, x29, x30) %>%
filter(!is.na(x5)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x5,
total_hiv_diag_per = x8,
without_aids_N = x10,
without_aids_per = x11,
concurrent_aids_N = x13,
concurrent_aids_per = x15,
aids = x16,
plwha = x21,
death = x25,
borough = x29,
sex = x30
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2017) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
## Warning: Expected 2 pieces. Additional pieces discarded in 32 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
data17_d = read_excel('./data/hiv_data/2017.xlsx', range = 'A50:AD335', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x6, x8, x10, x11, x13, x15, x16, x22, x25, x29, x30) %>%
filter(!is.na(x6)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x6,
total_hiv_diag_per = x8,
without_aids_N = x10,
without_aids_per = x11,
concurrent_aids_N = x13,
concurrent_aids_per = x15,
aids = x16,
plwha = x22,
death = x25,
borough = x29,
sex = x30
) %>%
separate(aids, c("aids_N", "aids_per"), " ") %>%
separate(plwha, c("plwha_N", "plwha_per"), " ") %>%
separate(death, c("death_N", "death_per"), " ") %>%
select(-aids_per, -plwha_per, -death_per) %>%
mutate(year = 2017) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
## Warning: Expected 2 pieces. Additional pieces discarded in 206 rows [2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 24,
## 45, 68, 89, 112, 133, 156, 177, 198].
This is the data for 2018:
data18 = read_excel('./data/hiv_data/2018.xlsx', range = 'A7:BU128', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x21, x24, x27, x31, x38, x42, x50, x58, x67, x72, x73) %>%
filter(!is.na(x21)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x21,
total_hiv_diag_per = x24,
without_aids_N = x27,
without_aids_per = x31,
concurrent_aids_N = x38,
concurrent_aids_per = x42,
aids_N = x50,
plwha_N = x58,
death_N = x67,
borough = x72,
sex = x73
) %>%
mutate(year = 2018) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
This is the data for 2019:
data19 = read_excel('./data/hiv_data/2019.xlsx', range = 'A7:O128', col_names = FALSE) %>%
janitor::clean_names() %>%
select(x1, x4, x5, x6, x7, x8, x9, x10, x11, x12, x14, x15) %>%
filter(!is.na(x4)) %>%
filter(!str_detect(x1, "poverty")) %>%
rename(
category = x1,
total_hiv_diag_N = x4,
total_hiv_diag_per = x5,
without_aids_N = x6,
without_aids_per = x7,
concurrent_aids_N = x8,
concurrent_aids_per = x9,
aids_N = x10,
plwha_N = x11,
death_N = x12,
borough = x14,
sex = x15
) %>%
mutate(year = 2019) %>%
mutate_if(is.numeric, as.character)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
data19 = data19 %>% retype() %>%
mutate(
total_hiv_diag_N = 2*total_hiv_diag_N,
without_aids_N = 2*without_aids_N,
concurrent_aids_N = 2*concurrent_aids_N,
aids_N = 2*aids_N,
death_N = 2*death_N
) %>% mutate_if(is.numeric, as.character)
Combine all the datasets together:
hiv_complete = list(
data11,
data11_d,
data12,
data12_d,
data13,
data13_d,
data14,
data14_d,
data15,
data15_d,
data16,
data16_d,
data17,
data17_d,
data18,
data19
) %>%
bind_rows() %>% relocate(year) %>% retype()
hiv_complete = hiv_complete %>%
mutate(
category = str_replace(category, "0‐12", "0-12"),
category = str_replace(category, "13‐19", "13-19"),
category = str_replace(category, "20‐29", "20-29"),
category = str_replace(category, "30‐39", "30-39"),
category = str_replace(category, "40‐49", "40-49"),
category = str_replace(category, "50‐59", "50-59"),
category = str_replace(category, "Latina/Hispanic", "Latino/Hispanic"),
category = str_replace(category, "MSM‐IDU", "MSM-IDU"),
category = str_replace(category, "Other rsk", "Other risk"),
category = str_replace(category, "Total", "TOTAL"),
category = str_replace(category, "Women", "Female"),
category = str_replace(category, "Men", "Male"),
)
hiv_complete = hiv_complete %>%
mutate(
sex_cat = case_when(
category == "Male" ~ "Male",
category == "Female" ~ "Female",
category == "Transgender" ~ "Transgender",
TRUE ~ "All"
),
race_cat = case_when(
category == "Black" ~ "Black",
category == "Latino/Hispanic" ~ "Latino/Hispanic",
category == "White" ~ "White",
category == "Asian/Pacific Islander" ~ "Asian/Pacific Islander",
category == "Native American" ~ "Native American",
category == "Multiracial" ~ "Multiracial",
category == "Unknown race" ~ "Unknown race",
TRUE ~ "All"
),
borough_cat = case_when(
category == "Bronx" ~ "Bronx",
category == "Brooklyn" ~ "Brooklyn",
category == "Manhattan" ~ "Manhattan",
category == "Queens" ~ "Queens",
category == "Staten Island" ~ "Staten Island",
category == "Outside NYC" ~ "Outside NYC",
category == "Unknown borough" ~ "Unknown borough",
TRUE ~ "All"
),
age_cat = case_when(
category == "0-12" ~ "0-12",
category == "13-19" ~ "13-19",
category == "20-29" ~ "20-29",
category == "30-39" ~ "30-39",
category == "40-49" ~ "40-49",
category == "50-59" ~ "50-59",
category == "60+" ~ "60+",
TRUE ~ "All"
),
risk_cat = case_when(
category == "Injection drug use history (IDU)" ~ "Injection drug use history (IDU)",
category == "Heterosexual contact" ~ "Heterosexual contact",
category == "Male who have sex with men (MSM)" ~ "Male who have sex with men (MSM)",
category == "MSM-IDU" ~ "MSM-IDU",
category == "Perinatal" ~ "Perinatal",
category == "Transgender people with sexual contact" ~ "Transgender people with sexual contact",
category == "Unknown risk" ~ "Unknown risk",
category == "Other risk" ~ "Other risk",
TRUE ~ "All"
))
write.csv(hiv_complete,'./data/hiv_complete.csv')